library
library(dplyr)
##
## 다음의 패키지를 부착합니다: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(keras)
library(tensorflow)
library(corrplot)
## corrplot 0.92 loaded
library(plotly)
## 필요한 패키지를 로딩중입니다: ggplot2
##
## 다음의 패키지를 부착합니다: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
tensorflow cpu 사용
tf$config$set_visible_devices(list(), "GPU")
tf$config$list_physical_devices()
## [[1]]
## PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU')
##
## [[2]]
## PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')
데이터 불러오기
hardness_data_set_tr = read.csv('Hardness data set_tr.csv')
hardness_data_set_val = read.csv('Hardness data set_val.csv')
데이터 확인
hardness_data_set_tr %>% str()
## 'data.frame': 800 obs. of 28 variables:
## $ Number : int 1 2 3 4 5 6 7 8 9 10 ...
## $ X : int -35 -35 -35 -35 -35 -35 -25 -25 -25 -25 ...
## $ Y : int -25 25 15 -15 5 -5 35 -35 25 -25 ...
## $ Al : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Ti : num 26.9 46.8 42.9 30.1 37.8 ...
## $ Cr : num 13.3 12.1 13 13.6 14.2 ...
## $ Fe : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Co : num 59.8 41.1 44.1 56.2 48 ...
## $ Ni : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Cu : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Zr : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Mo : num 0 0 0 0 0 0 0 0 0 0 ...
## $ W : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Mn : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Si : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Mg : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Re : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Ta : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Thickness: num 567 504 517 552 538 ...
## $ Hardness : num 11.5 13 13.8 11.7 14.6 ...
## $ Modulus : num 197 209 224 232 192 ...
## $ ravg : num 0.137 0.145 0.143 0.138 0.141 ...
## $ delta : num 0.14 0.148 0.148 0.143 0.148 ...
## $ dHmix : num -20.3 -23.9 -23.7 -21.4 -22.9 ...
## $ ENavg : num 1.78 1.73 1.74 1.77 1.75 ...
## $ dEN : num 0.122 0.125 0.125 0.123 0.125 ...
## $ N : int 3 3 3 3 3 3 3 3 3 3 ...
## $ Compo : chr "Co/Cr/Ti" "Co/Cr/Ti" "Co/Cr/Ti" "Co/Cr/Ti" ...
hardness_data_set_val %>% str()
## 'data.frame': 200 obs. of 28 variables:
## $ Number : int 801 802 803 804 805 806 807 808 809 810 ...
## $ X : int -5 -5 5 5 5 5 15 15 15 15 ...
## $ Y : int 15 35 -35 -15 5 25 -25 -5 15 35 ...
## $ Al : num 55.2 59.3 40.2 44.9 48.6 ...
## $ Ti : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Cr : num 21.8 16.8 41.3 33.7 26.4 ...
## $ Fe : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Co : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Ni : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Cu : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Zr : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Mo : int 0 0 0 0 0 0 0 0 0 0 ...
## $ W : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Mn : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Si : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Mg : num 23 24 18.4 21.4 25 ...
## $ Re : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Ta : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Thickness: num 498 513 414 454 480 ...
## $ Hardness : num 7.15 7.14 7.03 6.63 7.08 ...
## $ Modulus : num 176 157 177 159 166 ...
## $ ravg : num 0.475 0.489 0.404 0.449 0.502 ...
## $ delta : num 0.699 0.706 0.658 0.687 0.717 ...
## $ dHmix : num -1.0071 -1.2507 0.0672 0.102 0.2297 ...
## $ ENavg : num 1.55 1.55 1.58 1.56 1.55 ...
## $ dEN : num 0.134 0.134 0.128 0.134 0.139 ...
## $ N : int 3 3 3 3 3 3 3 3 3 3 ...
## $ Compo : chr "Mg/Al/Cr" "Mg/Al/Cr" "Mg/Al/Cr" "Mg/Al/Cr" ...
결측치 확인
table(is.na(hardness_data_set_tr)); table(is.na(hardness_data_set_val))
##
## FALSE
## 22400
##
## FALSE TRUE
## 2927 2673
결측치 위치
제거
hardness_data_set_val = hardness_data_set_val[-c(101:199),]
summary
hardness_data_set_tr %>% summary()
## Number X Y Al
## Min. : 1.0 Min. :-45.0000 Min. :-43.00 Min. : 0.00
## 1st Qu.:200.8 1st Qu.:-15.0000 1st Qu.: -5.00 1st Qu.: 0.00
## Median :400.5 Median : -2.0000 Median : 0.00 Median : 0.00
## Mean :400.5 Mean : -0.6587 Mean : 0.31 Mean : 13.09
## 3rd Qu.:600.2 3rd Qu.: 15.0000 3rd Qu.: 5.00 3rd Qu.: 15.45
## Max. :800.0 Max. : 35.0000 Max. : 43.00 Max. :100.00
## Ti Cr Fe Co
## Min. : 0.000 Min. : 0.00 Min. : 0.00 Min. : 0.000
## 1st Qu.: 0.000 1st Qu.: 0.00 1st Qu.: 0.00 1st Qu.: 0.000
## Median : 0.000 Median : 0.00 Median : 0.00 Median : 0.000
## Mean : 6.953 Mean : 19.48 Mean : 19.74 Mean : 4.323
## 3rd Qu.: 0.000 3rd Qu.: 27.12 3rd Qu.: 32.27 3rd Qu.: 0.000
## Max. :60.578 Max. :100.00 Max. :100.00 Max. :67.523
## Ni Cu Zr Mo
## Min. : 0.00 Min. : 0.000 Min. : 0.000 Min. : 0.000
## 1st Qu.: 0.00 1st Qu.: 0.000 1st Qu.: 0.000 1st Qu.: 0.000
## Median : 0.00 Median : 0.000 Median : 0.000 Median : 0.000
## Mean : 17.76 Mean : 6.033 Mean : 7.189 Mean : 1.335
## 3rd Qu.: 23.90 3rd Qu.: 0.000 3rd Qu.: 0.000 3rd Qu.: 0.000
## Max. :100.00 Max. :100.000 Max. :83.826 Max. :34.647
## W Mn Si Mg Re
## Min. : 0.000 Min. :0 Min. :0 Min. : 0.00 Min. : 0.0000
## 1st Qu.: 0.000 1st Qu.:0 1st Qu.:0 1st Qu.: 0.00 1st Qu.: 0.0000
## Median : 0.000 Median :0 Median :0 Median : 0.00 Median : 0.0000
## Mean : 2.954 Mean :0 Mean :0 Mean : 0.74 Mean : 0.2225
## 3rd Qu.: 0.000 3rd Qu.:0 3rd Qu.:0 3rd Qu.: 0.00 3rd Qu.: 0.0000
## Max. :100.000 Max. :0 Max. :0 Max. :23.52 Max. :13.6640
## Ta Thickness Hardness Modulus
## Min. : 0.000 Min. : 91.4 Min. : 2.297 Min. : 95.37
## 1st Qu.: 0.000 1st Qu.: 393.5 1st Qu.: 8.614 1st Qu.:164.02
## Median : 0.000 Median : 493.7 Median : 9.764 Median :185.49
## Mean : 0.184 Mean : 526.9 Mean : 9.836 Mean :182.72
## 3rd Qu.: 0.000 3rd Qu.: 604.1 3rd Qu.:11.611 3rd Qu.:203.50
## Max. :12.845 Max. :1252.0 Max. :19.368 Max. :300.05
## ravg delta dHmix ENavg
## Min. :0.1240 Min. :0.000000 Min. :-43.91 Min. :1.415
## 1st Qu.:0.1247 1st Qu.:0.000000 1st Qu.:-19.83 1st Qu.:1.627
## Median :0.1280 Median :0.003976 Median : -3.39 Median :1.784
## Mean :0.1445 Mean :0.067295 Mean :-10.47 Mean :1.754
## 3rd Qu.:0.1433 3rd Qu.:0.109187 3rd Qu.: 0.00 3rd Qu.:1.830
## Max. :0.4906 Max. :0.690492 Max. : 0.00 Max. :2.360
## dEN N Compo
## Min. :0.00000 Min. :1.000 Length:800
## 1st Qu.:0.00000 1st Qu.:1.000 Class :character
## Median :0.09058 Median :3.000 Mode :character
## Mean :0.08502 Mean :2.438
## 3rd Qu.:0.12439 3rd Qu.:3.000
## Max. :0.29124 Max. :4.000
hardness_data_set_val %>% summary()
## Number X Y Al
## Min. : 801 Min. :-35.00 Min. :-43.0000 Min. : 0.00
## 1st Qu.: 826 1st Qu.:-15.00 1st Qu.:-25.0000 1st Qu.: 0.00
## Median : 851 Median : 5.00 Median : 5.0000 Median :40.24
## Mean : 852 Mean : 2.03 Mean : -0.2277 Mean :30.58
## 3rd Qu.: 876 3rd Qu.: 15.00 3rd Qu.: 15.0000 3rd Qu.:47.65
## Max. :1000 Max. : 35.00 Max. : 35.0000 Max. :65.32
## Ti Cr Fe Co Ni
## Min. : 0.00 Min. : 0.00 Min. : 0.000 Min. :0 Min. : 0.000
## 1st Qu.: 0.00 1st Qu.: 0.00 1st Qu.: 0.000 1st Qu.:0 1st Qu.: 0.000
## Median :18.41 Median : 0.00 Median : 0.000 Median :0 Median : 0.000
## Mean :17.86 Mean :16.68 Mean : 9.667 Mean :0 Mean : 0.227
## 3rd Qu.:35.07 3rd Qu.:32.35 3rd Qu.:23.582 3rd Qu.:0 3rd Qu.: 0.000
## Max. :54.87 Max. :54.71 Max. :42.238 Max. :0 Max. :22.925
## Cu Zr Mo W Mn Si
## Min. :0 Min. :0 Min. :0 Min. :0 Min. :0 Min. :0
## 1st Qu.:0 1st Qu.:0 1st Qu.:0 1st Qu.:0 1st Qu.:0 1st Qu.:0
## Median :0 Median :0 Median :0 Median :0 Median :0 Median :0
## Mean :0 Mean :0 Mean :0 Mean :0 Mean :0 Mean :0
## 3rd Qu.:0 3rd Qu.:0 3rd Qu.:0 3rd Qu.:0 3rd Qu.:0 3rd Qu.:0
## Max. :0 Max. :0 Max. :0 Max. :0 Max. :0 Max. :0
## Mg Re Ta Thickness Hardness
## Min. : 0.00 Min. :0 Min. :0 Min. :279.9 Min. : 4.410
## 1st Qu.:20.43 1st Qu.:0 1st Qu.:0 1st Qu.:418.7 1st Qu.: 6.860
## Median :25.00 Median :0 Median :0 Median :452.7 Median : 7.293
## Mean :24.99 Mean :0 Mean :0 Mean :450.3 Mean : 7.252
## 3rd Qu.:30.26 3rd Qu.:0 3rd Qu.:0 3rd Qu.:483.2 3rd Qu.: 7.723
## Max. :39.29 Max. :0 Max. :0 Max. :574.8 Max. :11.739
## Modulus ravg delta dHmix
## Min. :122.0 Min. :0.1474 Min. :0.1458 Min. :-21.9551
## 1st Qu.:148.9 1st Qu.:0.4381 1st Qu.:0.6686 1st Qu.: -4.3164
## Median :159.9 Median :0.5064 Median :0.7128 Median : -0.4221
## Mean :159.3 Mean :0.5066 Mean :0.6977 Mean : 1.0934
## 3rd Qu.:171.4 3rd Qu.:0.5837 3rd Qu.:0.7441 3rd Qu.: 10.5022
## Max. :186.7 Max. :0.7188 Max. :0.7874 Max. : 16.3099
## ENavg dEN N Compo
## Min. :1.509 Min. :0.1065 Min. :3 Length:101
## 1st Qu.:1.539 1st Qu.:0.1391 1st Qu.:3 Class :character
## Median :1.558 Median :0.1488 Median :3 Mode :character
## Mean :1.567 Mean :0.1541 Mean :3
## 3rd Qu.:1.581 3rd Qu.:0.1700 3rd Qu.:3
## Max. :1.696 Max. :0.2028 Max. :3
상관계수 그래프
- 훈련 데이터의 Mn과 Si는 모두 0값이므로 제외
cor_data = cor(hardness_data_set_tr[,c(2:26)][,-c(13:14)], method = 'pearson')
cor_data %>% corrplot(method = 'number')

box_그래프
names_list = names(hardness_data_set_tr[,c(2:26)])
훈련데이터의 box그래프
tr_fig = plot_ly(y = hardness_data_set_tr[,2], type = 'box', quartilemethod="linear", name = names_list[1])
j = 2
for(i in 3:26){
tr_fig =
tr_fig %>%
add_trace(y = hardness_data_set_tr[,i], type = 'box', quartilemethod="linear", name = names_list[j])
j = j +1
}
tr_fig
검증데이터의 box그래프
val_fig = plot_ly(y = hardness_data_set_val[,2], type = 'box', quartilemethod="linear", name = names_list[1])
j = 2
for(i in 3:26){
val_fig =
val_fig %>%
add_trace(y = hardness_data_set_val[,i], type = 'box', quartilemethod="linear", name = names_list[j])
j = j +1
}
val_fig
- XY값 기준 min(-45) ~ max(45) 이므로 XY 좌표값은 0~1사이로
정규화
- 조성물성의 비율(0~100%)또한 0~1사이로 정규화
- 나머지는 훈련데이터의 min, max값으로 정규화
sel_hardness_data_set_tr = hardness_data_set_tr[,c(2:26)]
sel_hardness_data_set_val = hardness_data_set_val[,c(2:26)]
max_pos = 45 *2; min_pos = 0
sel_hardness_data_set_tr$X = sel_hardness_data_set_tr$X +45; sel_hardness_data_set_tr$Y = sel_hardness_data_set_tr$Y +45;
sel_hardness_data_set_tr$X = (sel_hardness_data_set_tr$X -min_pos)/(max_pos -min_pos)
sel_hardness_data_set_tr$Y = (sel_hardness_data_set_tr$Y -min_pos)/(max_pos -min_pos)
sel_hardness_data_set_val$X = sel_hardness_data_set_val$X +45; sel_hardness_data_set_val$Y = sel_hardness_data_set_val$Y +45;
sel_hardness_data_set_val$X = (sel_hardness_data_set_val$X -min_pos)/(max_pos -min_pos)
sel_hardness_data_set_val$Y = (sel_hardness_data_set_val$Y -min_pos)/(max_pos -min_pos)
sel_hardness_data_set_tr[,3:17] = sel_hardness_data_set_tr[,3:17] *0.01
sel_hardness_data_set_val[,3:17] = sel_hardness_data_set_val[,3:17] *0.01
for(i in 18:ncol(sel_hardness_data_set_tr)){
max_value = max(sel_hardness_data_set_tr[,i])
min_value = min(sel_hardness_data_set_tr[,i])
sel_hardness_data_set_tr[,i] = (sel_hardness_data_set_tr[,i] -min_value)/(max_value -min_value)
sel_hardness_data_set_val[,i] = (sel_hardness_data_set_val[,i] -min_value)/(max_value -min_value)
}
summary(sel_hardness_data_set_tr); summary(sel_hardness_data_set_val)
## X Y Al Ti
## Min. :0.0000 Min. :0.02222 Min. :0.0000 Min. :0.00000
## 1st Qu.:0.3333 1st Qu.:0.44444 1st Qu.:0.0000 1st Qu.:0.00000
## Median :0.4778 Median :0.50000 Median :0.0000 Median :0.00000
## Mean :0.4927 Mean :0.50344 Mean :0.1309 Mean :0.06953
## 3rd Qu.:0.6667 3rd Qu.:0.55556 3rd Qu.:0.1545 3rd Qu.:0.00000
## Max. :0.8889 Max. :0.97778 Max. :1.0000 Max. :0.60578
## Cr Fe Co Ni
## Min. :0.0000 Min. :0.0000 Min. :0.00000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.00000 Median :0.0000
## Mean :0.1948 Mean :0.1974 Mean :0.04323 Mean :0.1776
## 3rd Qu.:0.2712 3rd Qu.:0.3227 3rd Qu.:0.00000 3rd Qu.:0.2390
## Max. :1.0000 Max. :1.0000 Max. :0.67523 Max. :1.0000
## Cu Zr Mo W
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.06033 Mean :0.07189 Mean :0.01335 Mean :0.02954
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.00000 Max. :0.83826 Max. :0.34647 Max. :1.00000
## Mn Si Mg Re Ta
## Min. :0 Min. :0 Min. :0.0000 Min. :0.000000 Min. :0.00000
## 1st Qu.:0 1st Qu.:0 1st Qu.:0.0000 1st Qu.:0.000000 1st Qu.:0.00000
## Median :0 Median :0 Median :0.0000 Median :0.000000 Median :0.00000
## Mean :0 Mean :0 Mean :0.0074 Mean :0.002225 Mean :0.00184
## 3rd Qu.:0 3rd Qu.:0 3rd Qu.:0.0000 3rd Qu.:0.000000 3rd Qu.:0.00000
## Max. :0 Max. :0 Max. :0.2352 Max. :0.136640 Max. :0.12845
## Thickness Hardness Modulus ravg
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.000000
## 1st Qu.:0.2603 1st Qu.:0.3700 1st Qu.:0.3354 1st Qu.:0.001848
## Median :0.3466 Median :0.4374 Median :0.4403 Median :0.010911
## Mean :0.3752 Mean :0.4416 Mean :0.4267 Mean :0.055824
## 3rd Qu.:0.4418 3rd Qu.:0.5456 3rd Qu.:0.5283 3rd Qu.:0.052628
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.000000
## delta dHmix ENavg dEN
## Min. :0.000000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.000000 1st Qu.:0.5484 1st Qu.:0.2244 1st Qu.:0.0000
## Median :0.005759 Median :0.9228 Median :0.3909 Median :0.3110
## Mean :0.097460 Mean :0.7615 Mean :0.3587 Mean :0.2919
## 3rd Qu.:0.158130 3rd Qu.:1.0000 3rd Qu.:0.4392 3rd Qu.:0.4271
## Max. :1.000000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## X Y Al Ti
## Min. :0.1111 Min. :0.02222 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.3333 1st Qu.:0.22222 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.5556 Median :0.55556 Median :0.4024 Median :0.1841
## Mean :0.5226 Mean :0.49747 Mean :0.3058 Mean :0.1786
## 3rd Qu.:0.6667 3rd Qu.:0.66667 3rd Qu.:0.4765 3rd Qu.:0.3507
## Max. :0.8889 Max. :0.88889 Max. :0.6532 Max. :0.5487
## Cr Fe Co Ni Cu
## Min. :0.0000 Min. :0.00000 Min. :0 Min. :0.00000 Min. :0
## 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0 1st Qu.:0.00000 1st Qu.:0
## Median :0.0000 Median :0.00000 Median :0 Median :0.00000 Median :0
## Mean :0.1668 Mean :0.09667 Mean :0 Mean :0.00227 Mean :0
## 3rd Qu.:0.3235 3rd Qu.:0.23582 3rd Qu.:0 3rd Qu.:0.00000 3rd Qu.:0
## Max. :0.5471 Max. :0.42238 Max. :0 Max. :0.22925 Max. :0
## Zr Mo W Mn Si Mg
## Min. :0 Min. :0 Min. :0 Min. :0 Min. :0 Min. :0.0000
## 1st Qu.:0 1st Qu.:0 1st Qu.:0 1st Qu.:0 1st Qu.:0 1st Qu.:0.2043
## Median :0 Median :0 Median :0 Median :0 Median :0 Median :0.2500
## Mean :0 Mean :0 Mean :0 Mean :0 Mean :0 Mean :0.2499
## 3rd Qu.:0 3rd Qu.:0 3rd Qu.:0 3rd Qu.:0 3rd Qu.:0 3rd Qu.:0.3026
## Max. :0 Max. :0 Max. :0 Max. :0 Max. :0 Max. :0.3929
## Re Ta Thickness Hardness Modulus
## Min. :0 Min. :0 Min. :0.1625 Min. :0.1238 Min. :0.1299
## 1st Qu.:0 1st Qu.:0 1st Qu.:0.2820 1st Qu.:0.2673 1st Qu.:0.2614
## Median :0 Median :0 Median :0.3113 Median :0.2927 Median :0.3154
## Mean :0 Mean :0 Mean :0.3093 Mean :0.2903 Mean :0.3124
## 3rd Qu.:0 3rd Qu.:0 3rd Qu.:0.3376 3rd Qu.:0.3178 3rd Qu.:0.3712
## Max. :0 Max. :0 Max. :0.4165 Max. :0.5531 Max. :0.4461
## ravg delta dHmix ENavg
## Min. :0.06376 Min. :0.2111 Min. :0.5000 Min. :0.09975
## 1st Qu.:0.85670 1st Qu.:0.9683 1st Qu.:0.9017 1st Qu.:0.13110
## Median :1.04304 Median :1.0323 Median :0.9904 Median :0.15128
## Mean :1.04373 Mean :1.0105 Mean :1.0249 Mean :0.16105
## 3rd Qu.:1.25412 3rd Qu.:1.0776 3rd Qu.:1.2392 3rd Qu.:0.17554
## Max. :1.62254 Max. :1.1404 Max. :1.3714 Max. :0.29795
## dEN
## Min. :0.3657
## 1st Qu.:0.4775
## Median :0.5109
## Mean :0.5293
## 3rd Qu.:0.5838
## Max. :0.6964
결측치 확인
table(is.na(sel_hardness_data_set_tr)); table(is.na(sel_hardness_data_set_val))
##
## FALSE
## 20000
##
## FALSE
## 2525
정규화된 bar그래프
sel_name_list = names(sel_hardness_data_set_tr)
nor_fig_tr = plot_ly(y = sel_hardness_data_set_tr[,1], type = 'box', quartilemethod="linear", name = sel_name_list[1])
nor_fig_val = plot_ly(y = sel_hardness_data_set_val[,1], type = 'box', quartilemethod="linear", name = sel_name_list[1])
for(i in 2:(ncol(sel_hardness_data_set_tr) -0) ){
nor_fig_tr =
nor_fig_tr %>%
add_trace(y = sel_hardness_data_set_tr[,i], type = 'box', quartilemethod="linear", name = sel_name_list[i])
nor_fig_val =
nor_fig_val %>%
add_trace(y = sel_hardness_data_set_val[,i], type = 'box', quartilemethod="linear", name = sel_name_list[i])
}
nor_fig_tr
nor_fig_val
데이터셋을 행렬로 변환
x_train = as.matrix(sel_hardness_data_set_tr[,-c(19)])
y_train = matrix(sel_hardness_data_set_tr$Hardness, ncol = 1)
x_val = as.matrix(sel_hardness_data_set_val[,-c(19)])
y_val = matrix(sel_hardness_data_set_val$Hardness, ncol = 1)
랜덤값 고정
set.seed(7)
tf$random$set_seed(7)
기본 파라미터
act = 'selu'
batch.size = 32
모델생성
- 단순한 Dual_attention구조의 모델을 사용
k_clear_session()
input = layer_input(shape = ncol(x_train), name = 'input')
att_layer =
input %>%
layer_dense(units = ncol(x_train), name = 'att_score') %>%
layer_activation_softmax()
mul_layer = layer_multiply(inputs = list(input, att_layer))
hidden =
mul_layer %>%
layer_dense(units = ncol(x_train) *8, activation = act, name = 'hidden_1') %>%
layer_dense(units = ncol(x_train) *4, activation = act, name = 'hidden_2') %>%
layer_dense(units = ncol(x_train), activation = act, name = 'hidden_3')
output_att = layer_multiply(inputs = list(hidden, att_layer))
output =
output_att %>%
layer_dense(units = 1, name = 'output')
simple_att_model = keras_model(inputs = input, outputs = output, name = 'simple_att_model')
summary(simple_att_model)
## Model: "simple_att_model"
## ________________________________________________________________________________
## Layer (type) Output Shape Param # Connected to
## ================================================================================
## input (InputLayer) [(None, 24)] 0 []
## att_score (Dense) (None, 24) 600 ['input[0][0]']
## softmax (Softmax) (None, 24) 0 ['att_score[0][0]']
## multiply (Multiply) (None, 24) 0 ['input[0][0]',
## 'softmax[0][0]']
## hidden_1 (Dense) (None, 192) 4800 ['multiply[0][0]']
## hidden_2 (Dense) (None, 96) 18528 ['hidden_1[0][0]']
## hidden_3 (Dense) (None, 24) 2328 ['hidden_2[0][0]']
## multiply_1 (Multiply) (None, 24) 0 ['hidden_3[0][0]',
## 'softmax[0][0]']
## output (Dense) (None, 1) 25 ['multiply_1[0][0]']
## ================================================================================
## Total params: 26,281
## Trainable params: 26,281
## Non-trainable params: 0
## ________________________________________________________________________________
모델 plot
plot(simple_att_model)

lr 스케줄러 함수 정의
lr_schedule = function(epoch, lr) {
return(lr * 0.999)
}
lr_scheduler <- callback_learning_rate_scheduler(schedule = lr_schedule)
early stopping 설정
early_stopping <- callback_early_stopping(
monitor = 'val_loss',
patience = 200L,
restore_best_weights = TRUE
)
모델 훈련
loss 그래프 확인
loss_history = simple_att_model$history$history
plot_ly(x = 1:length(loss_history$loss), y = loss_history$loss, type = 'scatter', mode = 'line', name = 'tr_loss') %>%
add_trace(x = 1:length(loss_history$loss), y = loss_history$val_loss, type = 'scatter', mode = 'line', name = 'val_loss')
모델의 R2(결정계수)계산, SSE/SST
att_model_fitted_value = simple_att_model %>% predict(x_train, batch_size = batch.size)
## 25/25 - 0s - 94ms/epoch - 4ms/step
SST = sum((y_train -mean(y_train))^2)
SSE = sum((att_model_fitted_value -mean(y_train))^2)
print(SSE/SST)
## [1] 0.8470726
모델이 훈련시 어떤 변수에 가중치를 주었는지 확인
att_score_out = keras_model(inputs = input, outputs = att_layer)
att_score = att_score_out %>% predict(x_val, batch_size = batch.size)
## 4/4 - 0s - 36ms/epoch - 9ms/step
df_att_score = att_score %>% data.frame()
names(df_att_score) = sel_name_list[-19]
개별 데어터의 attention score 확인
row_num = sample(1:nrow(att_score), size = 4); row_num
## [1] 42 83 31 92
att_sc_list = list()
for(i in 1:4){
att_sc_list[[i]] = plot_ly(x = factor(sel_name_list[-19], levels = sel_name_list[-19]), y = att_score[row_num[i],], type = 'bar')
}
subplot(att_sc_list[[1]], att_sc_list[[2]], att_sc_list[[3]], att_sc_list[[4]], nrows = 2)
전체 데이터의 attention score 확인
View_graph = function(graph_data){
graph_data = data.frame(graph_data)
x = 1:nrow(graph_data)
col_num = ncol(graph_data)
if(ncol(graph_data) == 1){
pl_graph = plot_ly(x = x, y = as.matrix(graph_data[,1])[,1], type = 'scatter', mode = 'markers', name = names(graph_data)[1])
}else{
pl_graph = plot_ly(x = x, y = as.matrix(graph_data[,1])[,1], type = 'scatter', mode = 'markers', name = names(graph_data)[1])
for(i in 2:ncol(graph_data)){
pl_graph = pl_graph %>% add_trace(x = x, y = as.matrix(graph_data[,i])[,1], type = 'scatter', mode = 'markers', name = names(graph_data)[i])
}
}
return(pl_graph)
}
View_graph(df_att_score)
모델 예측
- 테스트 데이터로 예측해야 하나 따로 분류하지 않아 검증 데이터로
대체합니다.
prediction = simple_att_model %>% predict(x_val, batch_size = batch.size)
## 4/4 - 0s - 58ms/epoch - 14ms/step
결과값의 역 정규화
max_value = max(hardness_data_set_tr$Hardness); min_value = min(hardness_data_set_tr$Hardness)
y_val = (y_val +min_value) *(max_value -min_value)
prediction = (prediction +min_value) *(max_value -min_value)
histogram 그래프 확인
fig_hist = plot_ly(alpha = 0.6)
fig_hist =
fig_hist %>%
add_histogram(x = y_val[,1], name = 'y') %>%
add_histogram(x = prediction[,1], name = 'prediction') %>%
layout(barmode = "overlay")
fig_hist
실제값과 예측값의 점 그래프 확인
plot_ly(x = 1:nrow(y_val), y = y_val, type = 'scatter', mode = 'markers', name = 'y') %>%
add_trace(x = 1:nrow(y_val), y = prediction, type = 'scatter', mode = 'markers', name = 'prediction')